/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 * BFTree.java
 * Copyright (C) 2007 Haijian Shi
 *
 */

package weka.classifiers.trees;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

import weka.classifiers.Evaluation;
import weka.classifiers.RandomizableClassifier;
import weka.core.AdditionalMeasureProducer;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.RevisionUtils;
import weka.core.SelectedTag;
import weka.core.Tag;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.matrix.Matrix;

/**
 * <!-- globalinfo-start --> Class for building a best-first decision tree
 * classifier. This class uses binary split for both nominal and numeric
 * attributes. For missing values, the method of 'fractional' instances is used.<br/>
 * <br/>
 * For more information, see:<br/>
 * <br/>
 * Haijian Shi (2007). Best-first decision tree learning. Hamilton, NZ.<br/>
 * <br/>
 * Jerome Friedman, Trevor Hastie, Robert Tibshirani (2000). Additive logistic
 * regression : A statistical view of boosting. Annals of statistics.
 * 28(2):337-407.
 * <p/>
 * <!-- globalinfo-end -->
 * 
 * <!-- technical-bibtex-start --> BibTeX:
 * 
 * <pre>
 * &#64;mastersthesis{Shi2007,
 *    address = {Hamilton, NZ},
 *    author = {Haijian Shi},
 *    note = {COMP594},
 *    school = {University of Waikato},
 *    title = {Best-first decision tree learning},
 *    year = {2007}
 * }
 * 
 * &#64;article{Friedman2000,
 *    author = {Jerome Friedman and Trevor Hastie and Robert Tibshirani},
 *    journal = {Annals of statistics},
 *    number = {2},
 *    pages = {337-407},
 *    title = {Additive logistic regression : A statistical view of boosting},
 *    volume = {28},
 *    year = {2000},
 *    ISSN = {0090-5364}
 * }
 * </pre>
 * <p/>
 * <!-- technical-bibtex-end -->
 * 
 * <!-- options-start --> Valid options are:
 * <p/>
 * 
 * <pre>
 * -S &lt;num&gt;
 *  Random number seed.
 *  (default 1)
 * </pre>
 * 
 * <pre>
 * -D
 *  If set, classifier is run in debug mode and
 *  may output additional info to the console
 * </pre>
 * 
 * <pre>
 * -P &lt;UNPRUNED|POSTPRUNED|PREPRUNED&gt;
 *  The pruning strategy.
 *  (default: POSTPRUNED)
 * </pre>
 * 
 * <pre>
 * -M &lt;min no&gt;
 *  The minimal number of instances at the terminal nodes.
 *  (default 2)
 * </pre>
 * 
 * <pre>
 * -N &lt;num folds&gt;
 *  The number of folds used in the pruning.
 *  (default 5)
 * </pre>
 * 
 * <pre>
 * -H
 *  Don't use heuristic search for nominal attributes in multi-class
 *  problem (default yes).
 * </pre>
 * 
 * <pre>
 * -G
 *  Don't use Gini index for splitting (default yes),
 *  if not information is used.
 * </pre>
 * 
 * <pre>
 * -R
 *  Don't use error rate in internal cross-validation (default yes), 
 *  but root mean squared error.
 * </pre>
 * 
 * <pre>
 * -A
 *  Use the 1 SE rule to make pruning decision.
 *  (default no).
 * </pre>
 * 
 * <pre>
 * -C
 *  Percentage of training data size (0-1]
 *  (default 1).
 * </pre>
 * 
 * <!-- options-end -->
 * 
 * @author Haijian Shi (hs69@cs.waikato.ac.nz)
 * @version $Revision$
 */
public class BFTree extends RandomizableClassifier implements
  AdditionalMeasureProducer, TechnicalInformationHandler {

  /** For serialization. */
  private static final long serialVersionUID = -7035607375962528217L;

  /** pruning strategy: un-pruned */
  public static final int PRUNING_UNPRUNED = 0;
  /** pruning strategy: post-pruning */
  public static final int PRUNING_POSTPRUNING = 1;
  /** pruning strategy: pre-pruning */
  public static final int PRUNING_PREPRUNING = 2;
  /** pruning strategy */
  public static final Tag[] TAGS_PRUNING = {
    new Tag(PRUNING_UNPRUNED, "unpruned", "Un-pruned"),
    new Tag(PRUNING_POSTPRUNING, "postpruned", "Post-pruning"),
    new Tag(PRUNING_PREPRUNING, "prepruned", "Pre-pruning") };

  /** the pruning strategy */
  protected int m_PruningStrategy = PRUNING_POSTPRUNING;

  /** Successor nodes. */
  protected BFTree[] m_Successors;

  /** Attribute used for splitting. */
  protected Attribute m_Attribute;

  /** Split point (for numeric attributes). */
  protected double m_SplitValue;

  /** Split subset (for nominal attributes). */
  protected String m_SplitString;

  /** Class value for a node. */
  protected double m_ClassValue;

  /** Class attribute of a dataset. */
  protected Attribute m_ClassAttribute;

  /** Minimum number of instances at leaf nodes. */
  protected int m_minNumObj = 2;

  /** Number of folds for the pruning. */
  protected int m_numFoldsPruning = 5;

  /** If the ndoe is leaf node. */
  protected boolean m_isLeaf;

  /** Number of expansions. */
  protected static int m_Expansion;

  /**
   * Fixed number of expansions (if no pruning method is used, its value is -1.
   * Otherwise, its value is gotten from internal cross-validation).
   */
  protected int m_FixedExpansion = -1;

  /**
   * If use huristic search for binary split (default true). Note even if its
   * value is true, it is only used when the number of values of a nominal
   * attribute is larger than 4.
   */
  protected boolean m_Heuristic = true;

  /**
   * If use Gini index as the splitting criterion - default (if not, information
   * is used).
   */
  protected boolean m_UseGini = true;

  /**
   * If use error rate in internal cross-validation to fix the number of
   * expansions - default (if not, root mean squared error is used).
   */
  protected boolean m_UseErrorRate = true;

  /** If use the 1SE rule to make the decision. */
  protected boolean m_UseOneSE = false;

  /** Class distributions. */
  protected double[] m_Distribution;

  /** Branch proportions. */
  protected double[] m_Props;

  /** Sorted indices. */
  protected int[][] m_SortedIndices;

  /** Sorted weights. */
  protected double[][] m_Weights;

  /** Distributions of each attribute for two successor nodes. */
  protected double[][][] m_Dists;

  /** Class probabilities. */
  protected double[] m_ClassProbs;

  /** Total weights. */
  protected double m_TotalWeight;

  /** The training data size (0-1). Default 1. */
  protected double m_SizePer = 1;

  /**
   * Returns a string describing classifier
   * 
   * @return a description suitable for displaying in the explorer/experimenter
   *         gui
   */
  public String globalInfo() {
    return "Class for building a best-first decision tree classifier. "
      + "This class uses binary split for both nominal and numeric attributes. "
      + "For missing values, the method of 'fractional' instances is used.\n\n"
      + "For more information, see:\n\n" + getTechnicalInformation().toString();
  }

  /**
   * Returns an instance of a TechnicalInformation object, containing detailed
   * information about the technical background of this class, e.g., paper
   * reference or book this class is based on.
   * 
   * @return the technical information about this class
   */
  @Override
  public TechnicalInformation getTechnicalInformation() {
    TechnicalInformation result;
    TechnicalInformation additional;

    result = new TechnicalInformation(Type.MASTERSTHESIS);
    result.setValue(Field.AUTHOR, "Haijian Shi");
    result.setValue(Field.YEAR, "2007");
    result.setValue(Field.TITLE, "Best-first decision tree learning");
    result.setValue(Field.SCHOOL, "University of Waikato");
    result.setValue(Field.ADDRESS, "Hamilton, NZ");
    result.setValue(Field.NOTE, "COMP594");

    additional = result.add(Type.ARTICLE);
    additional.setValue(Field.AUTHOR,
      "Jerome Friedman and Trevor Hastie and Robert Tibshirani");
    additional.setValue(Field.YEAR, "2000");
    additional.setValue(Field.TITLE,
      "Additive logistic regression : A statistical view of boosting");
    additional.setValue(Field.JOURNAL, "Annals of statistics");
    additional.setValue(Field.VOLUME, "28");
    additional.setValue(Field.NUMBER, "2");
    additional.setValue(Field.PAGES, "337-407");
    additional.setValue(Field.ISSN, "0090-5364");

    return result;
  }

  /**
   * Returns default capabilities of the classifier.
   * 
   * @return the capabilities of this classifier
   */
  @Override
  public Capabilities getCapabilities() {
    Capabilities result = super.getCapabilities();
    result.disableAll();

    // attributes
    result.enable(Capability.NOMINAL_ATTRIBUTES);
    result.enable(Capability.NUMERIC_ATTRIBUTES);
    result.enable(Capability.MISSING_VALUES);

    // class
    result.enable(Capability.NOMINAL_CLASS);

    return result;
  }

  /**
   * Method for building a BestFirst decision tree classifier.
   * 
   * @param data set of instances serving as training data
   * @throws Exception if decision tree cannot be built successfully
   */
  @Override
  public void buildClassifier(Instances data) throws Exception {

    getCapabilities().testWithFail(data);
    data = new Instances(data);
    data.deleteWithMissingClass();

    // build an unpruned tree
    if (m_PruningStrategy == PRUNING_UNPRUNED) {

      // calculate sorted indices, weights and initial class probabilities
      int[][] sortedIndices = new int[data.numAttributes()][0];
      double[][] weights = new double[data.numAttributes()][0];
      double[] classProbs = new double[data.numClasses()];
      double totalWeight = computeSortedInfo(data, sortedIndices, weights,
        classProbs);

      // Compute information of the best split for this node (include split
      // attribute,
      // split value and gini gain (or information gain)). At the same time,
      // compute
      // variables dists, props and totalSubsetWeights.
      double[][][] dists = new double[data.numAttributes()][2][data
        .numClasses()];
      double[][] props = new double[data.numAttributes()][2];
      double[][] totalSubsetWeights = new double[data.numAttributes()][2];
      ArrayList<Object> nodeInfo = computeSplitInfo(this, data, sortedIndices,
        weights, dists, props, totalSubsetWeights, m_Heuristic, m_UseGini);

      // add the node (with all split info) into BestFirstElements
      ArrayList<ArrayList<Object>> BestFirstElements = new ArrayList<ArrayList<Object>>();
      BestFirstElements.add(nodeInfo);

      // Make the best-first decision tree.
      int attIndex = ((Attribute) nodeInfo.get(1)).index();
      m_Expansion = 0;
      makeTree(BestFirstElements, data, sortedIndices, weights, dists,
        classProbs, totalWeight, props[attIndex], m_minNumObj, m_Heuristic,
        m_UseGini, m_FixedExpansion);

      return;
    }

    // the following code is for pre-pruning and post-pruning methods

    // Compute train data, test data, sorted indices, sorted weights, total
    // weights,
    // class probabilities, class distributions, branch proportions and total
    // subset
    // weights for root nodes of each fold for prepruning and postpruning.
    int expansion = 0;

    Random random = new Random(m_Seed);
    Instances cvData = new Instances(data);
    cvData.randomize(random);
    cvData = new Instances(cvData, 0,
      (int) (cvData.numInstances() * m_SizePer) - 1);
    cvData.stratify(m_numFoldsPruning);

    Instances[] train = new Instances[m_numFoldsPruning];
    Instances[] test = new Instances[m_numFoldsPruning];
    @SuppressWarnings("unchecked")
    ArrayList<ArrayList<Object>>[] parallelBFElements = new ArrayList[m_numFoldsPruning];
    BFTree[] m_roots = new BFTree[m_numFoldsPruning];

    int[][][] sortedIndices = new int[m_numFoldsPruning][data.numAttributes()][0];
    double[][][] weights = new double[m_numFoldsPruning][data.numAttributes()][0];
    double[][] classProbs = new double[m_numFoldsPruning][data.numClasses()];
    double[] totalWeight = new double[m_numFoldsPruning];

    double[][][][] dists = new double[m_numFoldsPruning][data.numAttributes()][2][data
      .numClasses()];
    double[][][] props = new double[m_numFoldsPruning][data.numAttributes()][2];
    double[][][] totalSubsetWeights = new double[m_numFoldsPruning][data
      .numAttributes()][2];
    @SuppressWarnings("unchecked")
    ArrayList<Object>[] nodeInfo = new ArrayList[m_numFoldsPruning];

    for (int i = 0; i < m_numFoldsPruning; i++) {
      train[i] = cvData.trainCV(m_numFoldsPruning, i);
      test[i] = cvData.testCV(m_numFoldsPruning, i);
      parallelBFElements[i] = new ArrayList<ArrayList<Object>>();
      m_roots[i] = new BFTree();

      // calculate sorted indices, weights, initial class counts and total
      // weights for each training data
      totalWeight[i] = computeSortedInfo(train[i], sortedIndices[i],
        weights[i], classProbs[i]);

      // compute information of the best split for this node (include split
      // attribute,
      // split value and gini gain (or information gain)) in this fold
      nodeInfo[i] = computeSplitInfo(m_roots[i], train[i], sortedIndices[i],
        weights[i], dists[i], props[i], totalSubsetWeights[i], m_Heuristic,
        m_UseGini);

      // compute information for root nodes

      int attIndex = ((Attribute) nodeInfo[i].get(1)).index();

      m_roots[i].m_SortedIndices = new int[sortedIndices[i].length][0];
      m_roots[i].m_Weights = new double[weights[i].length][0];
      m_roots[i].m_Dists = new double[dists[i].length][0][0];
      m_roots[i].m_ClassProbs = new double[classProbs[i].length];
      m_roots[i].m_Distribution = new double[classProbs[i].length];
      m_roots[i].m_Props = new double[2];

      for (int j = 0; j < m_roots[i].m_SortedIndices.length; j++) {
        m_roots[i].m_SortedIndices[j] = sortedIndices[i][j];
        m_roots[i].m_Weights[j] = weights[i][j];
        m_roots[i].m_Dists[j] = dists[i][j];
      }

      System.arraycopy(classProbs[i], 0, m_roots[i].m_ClassProbs, 0,
        classProbs[i].length);
      if (Utils.sum(m_roots[i].m_ClassProbs) != 0) {
        Utils.normalize(m_roots[i].m_ClassProbs);
      }

      System.arraycopy(classProbs[i], 0, m_roots[i].m_Distribution, 0,
        classProbs[i].length);
      System.arraycopy(props[i][attIndex], 0, m_roots[i].m_Props, 0,
        props[i][attIndex].length);

      m_roots[i].m_TotalWeight = totalWeight[i];

      parallelBFElements[i].add(nodeInfo[i]);
    }

    // build a pre-pruned tree
    if (m_PruningStrategy == PRUNING_PREPRUNING) {

      double previousError = Double.MAX_VALUE;
      double currentError = previousError;
      double minError = Double.MAX_VALUE;
      ArrayList<Double> errorList = new ArrayList<Double>();
      while (true) {
        // compute average error
        double expansionError = 0;
        int count = 0;

        for (int i = 0; i < m_numFoldsPruning; i++) {
          Evaluation eval;

          // calculate error rate if only root node
          if (expansion == 0) {
            m_roots[i].m_isLeaf = true;
            eval = new Evaluation(test[i]);
            eval.evaluateModel(m_roots[i], test[i]);
            if (m_UseErrorRate) {
              expansionError += eval.errorRate();
            } else {
              expansionError += eval.rootMeanSquaredError();
            }
            count++;
          }

          // make tree - expand one node at a time
          else {
            if (m_roots[i] == null) {
              continue; // if the tree cannot be expanded, go to next fold
            }
            m_roots[i].m_isLeaf = false;
            BFTree nodeToSplit = (BFTree) ((parallelBFElements[i].get(0))
              .get(0));
            if (!m_roots[i].makeTree(parallelBFElements[i], m_roots[i],
              train[i], nodeToSplit.m_SortedIndices, nodeToSplit.m_Weights,
              nodeToSplit.m_Dists, nodeToSplit.m_ClassProbs,
              nodeToSplit.m_TotalWeight, nodeToSplit.m_Props, m_minNumObj,
              m_Heuristic, m_UseGini)) {
              m_roots[i] = null; // cannot be expanded
              continue;
            }
            eval = new Evaluation(test[i]);
            eval.evaluateModel(m_roots[i], test[i]);
            if (m_UseErrorRate) {
              expansionError += eval.errorRate();
            } else {
              expansionError += eval.rootMeanSquaredError();
            }
            count++;
          }
        }

        // no tree can be expanded any more
        if (count == 0) {
          break;
        }

        expansionError /= count;
        errorList.add(new Double(expansionError));
        currentError = expansionError;

        if (!m_UseOneSE) {
          if (currentError > previousError) {
            break;
          }
        }

        else {
          if (expansionError < minError) {
            minError = expansionError;
          }

          if (currentError > previousError) {
            double oneSE = Math.sqrt(minError * (1 - minError)
              / data.numInstances());
            if (currentError > minError + oneSE) {
              break;
            }
          }
        }

        expansion++;
        previousError = currentError;
      }

      if (!m_UseOneSE) {
        expansion = expansion - 1;
      } else {
        double oneSE = Math.sqrt(minError * (1 - minError)
          / data.numInstances());
        for (int i = 0; i < errorList.size(); i++) {
          double error = (errorList.get(i)).doubleValue();
          if (error <= minError + oneSE) { // && counts[i]>=m_numFoldsPruning/2)
                                           // {
            expansion = i;
            break;
          }
        }
      }
    }

    // build a postpruned tree
    else {
      @SuppressWarnings("unchecked")
      ArrayList<Double>[] modelError = new ArrayList[m_numFoldsPruning];

      // calculate error of each expansion for each fold
      for (int i = 0; i < m_numFoldsPruning; i++) {
        modelError[i] = new ArrayList<Double>();

        m_roots[i].m_isLeaf = true;
        Evaluation eval = new Evaluation(test[i]);
        eval.evaluateModel(m_roots[i], test[i]);
        double error;
        if (m_UseErrorRate) {
          error = eval.errorRate();
        } else {
          error = eval.rootMeanSquaredError();
        }
        modelError[i].add(new Double(error));

        m_roots[i].m_isLeaf = false;
        BFTree nodeToSplit = (BFTree) ((parallelBFElements[i].get(0)).get(0));

        m_roots[i].makeTree(parallelBFElements[i], m_roots[i], train[i],
          test[i], modelError[i], nodeToSplit.m_SortedIndices,
          nodeToSplit.m_Weights, nodeToSplit.m_Dists, nodeToSplit.m_ClassProbs,
          nodeToSplit.m_TotalWeight, nodeToSplit.m_Props, m_minNumObj,
          m_Heuristic, m_UseGini, m_UseErrorRate);
        m_roots[i] = null;
      }

      // find the expansion with minimal error rate
      double minError = Double.MAX_VALUE;

      int maxExpansion = modelError[0].size();
      for (int i = 1; i < modelError.length; i++) {
        if (modelError[i].size() > maxExpansion) {
          maxExpansion = modelError[i].size();
        }
      }

      double[] error = new double[maxExpansion];
      int[] counts = new int[maxExpansion];
      for (int i = 0; i < maxExpansion; i++) {
        counts[i] = 0;
        error[i] = 0;
        for (int j = 0; j < m_numFoldsPruning; j++) {
          if (i < modelError[j].size()) {
            error[i] += modelError[j].get(i).doubleValue();
            counts[i]++;
          }
        }
        error[i] = error[i] / counts[i]; // average error for each expansion

        if (error[i] < minError) {// && counts[i]>=m_numFoldsPruning/2) {
          minError = error[i];
          expansion = i;
        }
      }

      // the 1 SE rule choosen
      if (m_UseOneSE) {
        double oneSE = Math.sqrt(minError * (1 - minError)
          / data.numInstances());
        for (int i = 0; i < maxExpansion; i++) {
          if (error[i] <= minError + oneSE) { // &&
                                              // counts[i]>=m_numFoldsPruning/2)
                                              // {
            expansion = i;
            break;
          }
        }
      }
    }

    // make tree on all data based on the expansion caculated
    // from cross-validation

    // calculate sorted indices, weights and initial class counts
    int[][] prune_sortedIndices = new int[data.numAttributes()][0];
    double[][] prune_weights = new double[data.numAttributes()][0];
    double[] prune_classProbs = new double[data.numClasses()];
    double prune_totalWeight = computeSortedInfo(data, prune_sortedIndices,
      prune_weights, prune_classProbs);

    // compute information of the best split for this node (include split
    // attribute,
    // split value and gini gain)
    double[][][] prune_dists = new double[data.numAttributes()][2][data
      .numClasses()];
    double[][] prune_props = new double[data.numAttributes()][2];
    double[][] prune_totalSubsetWeights = new double[data.numAttributes()][2];
    ArrayList<Object> prune_nodeInfo = computeSplitInfo(this, data,
      prune_sortedIndices, prune_weights, prune_dists, prune_props,
      prune_totalSubsetWeights, m_Heuristic, m_UseGini);

    // add the root node (with its split info) to BestFirstElements
    ArrayList<ArrayList<Object>> BestFirstElements = new ArrayList<ArrayList<Object>>();
    BestFirstElements.add(prune_nodeInfo);

    int attIndex = ((Attribute) prune_nodeInfo.get(1)).index();
    m_Expansion = 0;
    makeTree(BestFirstElements, data, prune_sortedIndices, prune_weights,
      prune_dists, prune_classProbs, prune_totalWeight, prune_props[attIndex],
      m_minNumObj, m_Heuristic, m_UseGini, expansion);
  }

  /**
   * Recursively build a best-first decision tree. Method for building a
   * Best-First tree for a given number of expansions. preExpasion is -1 means
   * that no expansion is specified (just for a tree without any pruning
   * method). Pre-pruning and post-pruning methods also use this method to build
   * the final tree on all training data based on the expansion calculated from
   * internal cross-validation.
   * 
   * @param BestFirstElements list to store BFTree nodes
   * @param data training data
   * @param sortedIndices sorted indices of the instances
   * @param weights weights of the instances
   * @param dists class distributions for each attribute
   * @param classProbs class probabilities of this node
   * @param totalWeight total weight of this node (note if the node can not
   *          split, this value is not calculated.)
   * @param branchProps proportions of two subbranches
   * @param minNumObj minimal number of instances at leaf nodes
   * @param useHeuristic if use heuristic search for nominal attributes in
   *          multi-class problem
   * @param useGini if use Gini index as splitting criterion
   * @param preExpansion the number of expansions the tree to be expanded
   * @throws Exception if something goes wrong
   */
  protected void makeTree(ArrayList<ArrayList<Object>> BestFirstElements,
    Instances data, int[][] sortedIndices, double[][] weights,
    double[][][] dists, double[] classProbs, double totalWeight,
    double[] branchProps, int minNumObj, boolean useHeuristic, boolean useGini,
    int preExpansion) throws Exception {

    if (BestFirstElements.size() == 0) {
      return;
    }

    // /////////////////////////////////////////////////////////////////////
    // All information about the node to split (the first BestFirst object in
    // BestFirstElements)
    ArrayList<Object> firstElement = BestFirstElements.get(0);

    // split attribute
    Attribute att = (Attribute) firstElement.get(1);

    // info of split value or split string
    double splitValue = Double.NaN;
    String splitStr = null;
    if (att.isNumeric()) {
      splitValue = ((Double) firstElement.get(2)).doubleValue();
    } else {
      splitStr = ((String) firstElement.get(2)).toString();
    }

    // the best gini gain or information gain of this node
    double gain = ((Double) firstElement.get(3)).doubleValue();
    // /////////////////////////////////////////////////////////////////////

    if (m_ClassProbs == null) {
      m_SortedIndices = new int[sortedIndices.length][0];
      m_Weights = new double[weights.length][0];
      m_Dists = new double[dists.length][0][0];
      m_ClassProbs = new double[classProbs.length];
      m_Distribution = new double[classProbs.length];
      m_Props = new double[2];

      for (int i = 0; i < m_SortedIndices.length; i++) {
        m_SortedIndices[i] = sortedIndices[i];
        m_Weights[i] = weights[i];
        m_Dists[i] = dists[i];
      }

      System.arraycopy(classProbs, 0, m_ClassProbs, 0, classProbs.length);
      System.arraycopy(classProbs, 0, m_Distribution, 0, classProbs.length);
      System.arraycopy(branchProps, 0, m_Props, 0, m_Props.length);
      m_TotalWeight = totalWeight;
      if (Utils.sum(m_ClassProbs) != 0) {
        Utils.normalize(m_ClassProbs);
      }
    }

    // If no enough data or this node can not be split, find next node to split.
    if (totalWeight < 2 * minNumObj || branchProps[0] == 0
      || branchProps[1] == 0) {
      // remove the first element
      BestFirstElements.remove(0);

      makeLeaf(data);
      if (BestFirstElements.size() != 0) {
        ArrayList<Object> nextSplitElement = BestFirstElements.get(0);
        BFTree nextSplitNode = (BFTree) nextSplitElement.get(0);
        nextSplitNode.makeTree(BestFirstElements, data,
          nextSplitNode.m_SortedIndices, nextSplitNode.m_Weights,
          nextSplitNode.m_Dists, nextSplitNode.m_ClassProbs,
          nextSplitNode.m_TotalWeight, nextSplitNode.m_Props, minNumObj,
          useHeuristic, useGini, preExpansion);
      }
      return;
    }

    // If gini gain or information gain is 0, make all nodes in the
    // BestFirstElements leaf nodes
    // because these nodes are sorted descendingly according to gini gain or
    // information gain.
    // (namely, gini gain or information gain of all nodes in BestFirstEelements
    // is 0).
    if (gain == 0 || preExpansion == m_Expansion) {
      for (int i = 0; i < BestFirstElements.size(); i++) {
        ArrayList<Object> element = BestFirstElements.get(i);
        BFTree node = (BFTree) element.get(0);
        node.makeLeaf(data);
      }
      BestFirstElements.clear();
    }

    // gain is not 0
    else {
      // remove the first element
      BestFirstElements.remove(0);

      m_Attribute = att;
      if (m_Attribute.isNumeric()) {
        m_SplitValue = splitValue;
      } else {
        m_SplitString = splitStr;
      }

      int[][][] subsetIndices = new int[2][data.numAttributes()][0];
      double[][][] subsetWeights = new double[2][data.numAttributes()][0];

      splitData(subsetIndices, subsetWeights, m_Attribute, m_SplitValue,
        m_SplitString, sortedIndices, weights, data);

      // If split will generate node(s) which has total weights less than
      // m_minNumObj,
      // do not split.
      int attIndex = att.index();
      if (subsetIndices[0][attIndex].length < minNumObj
        || subsetIndices[1][attIndex].length < minNumObj) {
        makeLeaf(data);
      }

      // split the node
      else {
        m_isLeaf = false;
        m_Attribute = att;

        // if expansion is specified (if pruning method used)
        if ((m_PruningStrategy == PRUNING_PREPRUNING)
          || (m_PruningStrategy == PRUNING_POSTPRUNING) || (preExpansion != -1)) {
          m_Expansion++;
        }

        makeSuccessors(BestFirstElements, data, subsetIndices, subsetWeights,
          dists, att, useHeuristic, useGini);
      }

      // choose next node to split
      if (BestFirstElements.size() != 0) {
        ArrayList<Object> nextSplitElement = BestFirstElements.get(0);
        BFTree nextSplitNode = (BFTree) nextSplitElement.get(0);
        nextSplitNode.makeTree(BestFirstElements, data,
          nextSplitNode.m_SortedIndices, nextSplitNode.m_Weights,
          nextSplitNode.m_Dists, nextSplitNode.m_ClassProbs,
          nextSplitNode.m_TotalWeight, nextSplitNode.m_Props, minNumObj,
          useHeuristic, useGini, preExpansion);
      }

    }
  }

  /**
   * This method is to find the number of expansions based on internal
   * cross-validation for just pre-pruning. It expands the first BestFirst node
   * in the BestFirstElements if it is expansible, otherwise it looks for next
   * exapansible node. If it finds a node is expansibel, expand the node, then
   * return true. (note it just expands one node at a time).
   * 
   * @param BestFirstElements list to store BFTree nodes
   * @param root root node of tree in each fold
   * @param train training data
   * @param sortedIndices sorted indices of the instances
   * @param weights weights of the instances
   * @param dists class distributions for each attribute
   * @param classProbs class probabilities of this node
   * @param totalWeight total weight of this node (note if the node can not
   *          split, this value is not calculated.)
   * @param branchProps proportions of two subbranches
   * @param minNumObj minimal number of instances at leaf nodes
   * @param useHeuristic if use heuristic search for nominal attributes in
   *          multi-class problem
   * @param useGini if use Gini index as splitting criterion
   * @return true if expand successfully, otherwise return false (all nodes in
   *         BestFirstElements cannot be expanded).
   * @throws Exception if something goes wrong
   */
  protected boolean makeTree(ArrayList<ArrayList<Object>> BestFirstElements,
    BFTree root, Instances train, int[][] sortedIndices, double[][] weights,
    double[][][] dists, double[] classProbs, double totalWeight,
    double[] branchProps, int minNumObj, boolean useHeuristic, boolean useGini)
    throws Exception {

    if (BestFirstElements.size() == 0) {
      return false;
    }

    // /////////////////////////////////////////////////////////////////////
    // All information about the node to split (first BestFirst object in
    // BestFirstElements)
    ArrayList<Object> firstElement = BestFirstElements.get(0);

    // node to split
    BFTree nodeToSplit = (BFTree) firstElement.get(0);

    // split attribute
    Attribute att = (Attribute) firstElement.get(1);

    // info of split value or split string
    double splitValue = Double.NaN;
    String splitStr = null;
    if (att.isNumeric()) {
      splitValue = ((Double) firstElement.get(2)).doubleValue();
    } else {
      splitStr = ((String) firstElement.get(2)).toString();
    }

    // the best gini gain or information gain of this node
    double gain = ((Double) firstElement.get(3)).doubleValue();
    // /////////////////////////////////////////////////////////////////////

    // If no enough data to split for this node or this node can not be split
    // find next node to split.
    if (totalWeight < 2 * minNumObj || branchProps[0] == 0
      || branchProps[1] == 0) {
      // remove the first element
      BestFirstElements.remove(0);
      nodeToSplit.makeLeaf(train);
      if (BestFirstElements.size() == 0) {
        return false;
      }
      BFTree nextNode = (BFTree) BestFirstElements.get(0).get(0);
      return root.makeTree(BestFirstElements, root, train,
        nextNode.m_SortedIndices, nextNode.m_Weights, nextNode.m_Dists,
        nextNode.m_ClassProbs, nextNode.m_TotalWeight, nextNode.m_Props,
        minNumObj, useHeuristic, useGini);
    }

    // If gini gain or information is 0, make all nodes in the BestFirstElements
    // leaf nodes
    // because these node sorted descendingly according to gini gain or
    // information gain.
    // (namely, gini gain or information gain of all nodes in BestFirstEelements
    // is 0).
    if (gain == 0) {
      for (int i = 0; i < BestFirstElements.size(); i++) {
        ArrayList<Object> element = BestFirstElements.get(i);
        BFTree node = (BFTree) element.get(0);
        node.makeLeaf(train);
      }
      BestFirstElements.clear();
      return false;
    }

    else {
      // remove the first element
      BestFirstElements.remove(0);
      nodeToSplit.m_Attribute = att;
      if (att.isNumeric()) {
        nodeToSplit.m_SplitValue = splitValue;
      } else {
        nodeToSplit.m_SplitString = splitStr;
      }

      int[][][] subsetIndices = new int[2][train.numAttributes()][0];
      double[][][] subsetWeights = new double[2][train.numAttributes()][0];

      splitData(subsetIndices, subsetWeights, nodeToSplit.m_Attribute,
        nodeToSplit.m_SplitValue, nodeToSplit.m_SplitString,
        nodeToSplit.m_SortedIndices, nodeToSplit.m_Weights, train);

      // if split will generate node(s) which has total weights less than
      // m_minNumObj,
      // do not split
      int attIndex = att.index();
      if (subsetIndices[0][attIndex].length < minNumObj
        || subsetIndices[1][attIndex].length < minNumObj) {

        nodeToSplit.makeLeaf(train);
        BFTree nextNode = (BFTree) BestFirstElements.get(0).get(0);
        return root.makeTree(BestFirstElements, root, train,
          nextNode.m_SortedIndices, nextNode.m_Weights, nextNode.m_Dists,
          nextNode.m_ClassProbs, nextNode.m_TotalWeight, nextNode.m_Props,
          minNumObj, useHeuristic, useGini);
      }

      // split the node
      else {
        nodeToSplit.m_isLeaf = false;
        nodeToSplit.m_Attribute = att;

        nodeToSplit.makeSuccessors(BestFirstElements, train, subsetIndices,
          subsetWeights, dists, nodeToSplit.m_Attribute, useHeuristic, useGini);

        for (int i = 0; i < 2; i++) {
          nodeToSplit.m_Successors[i].makeLeaf(train);
        }

        return true;
      }
    }
  }

  /**
   * This method is to find the number of expansions based on internal
   * cross-validation for just post-pruning. It expands the first BestFirst node
   * in the BestFirstElements until no node can be split. When building the
   * tree, stroe error for each temporary tree, namely for each expansion.
   * 
   * @param BestFirstElements list to store BFTree nodes
   * @param root root node of tree in each fold
   * @param train training data in each fold
   * @param test test data in each fold
   * @param modelError list to store error for each expansion in each fold
   * @param sortedIndices sorted indices of the instances
   * @param weights weights of the instances
   * @param dists class distributions for each attribute
   * @param classProbs class probabilities of this node
   * @param totalWeight total weight of this node (note if the node can not
   *          split, this value is not calculated.)
   * @param branchProps proportions of two subbranches
   * @param minNumObj minimal number of instances at leaf nodes
   * @param useHeuristic if use heuristic search for nominal attributes in
   *          multi-class problem
   * @param useGini if use Gini index as splitting criterion
   * @param useErrorRate if use error rate in internal cross-validation
   * @throws Exception if something goes wrong
   */
  protected void makeTree(ArrayList<ArrayList<Object>> BestFirstElements,
    BFTree root, Instances train, Instances test, ArrayList<Double> modelError,
    int[][] sortedIndices, double[][] weights, double[][][] dists,
    double[] classProbs, double totalWeight, double[] branchProps,
    int minNumObj, boolean useHeuristic, boolean useGini, boolean useErrorRate)
    throws Exception {

    if (BestFirstElements.size() == 0) {
      return;
    }

    // /////////////////////////////////////////////////////////////////////
    // All information about the node to split (first BestFirst object in
    // BestFirstElements)
    ArrayList<Object> firstElement = BestFirstElements.get(0);

    // node to split
    // BFTree nodeToSplit = (BFTree)firstElement.elementAt(0);

    // split attribute
    Attribute att = (Attribute) firstElement.get(1);

    // info of split value or split string
    double splitValue = Double.NaN;
    String splitStr = null;
    if (att.isNumeric()) {
      splitValue = ((Double) firstElement.get(2)).doubleValue();
    } else {
      splitStr = ((String) firstElement.get(2)).toString();
    }

    // the best gini gain or information of this node
    double gain = ((Double) firstElement.get(3)).doubleValue();
    // /////////////////////////////////////////////////////////////////////

    if (totalWeight < 2 * minNumObj || branchProps[0] == 0
      || branchProps[1] == 0) {
      // remove the first element
      BestFirstElements.remove(0);
      makeLeaf(train);
      if (BestFirstElements.size() == 0) {
        return;
      }

      BFTree nextSplitNode = (BFTree) BestFirstElements.get(0).get(0);
      nextSplitNode.makeTree(BestFirstElements, root, train, test, modelError,
        nextSplitNode.m_SortedIndices, nextSplitNode.m_Weights,
        nextSplitNode.m_Dists, nextSplitNode.m_ClassProbs,
        nextSplitNode.m_TotalWeight, nextSplitNode.m_Props, minNumObj,
        useHeuristic, useGini, useErrorRate);
      return;

    }

    // If gini gain or information gain is 0, make all nodes in the
    // BestFirstElements leaf nodes
    // because these node sorted descendingly according to gini gain or
    // information gain.
    // (namely, gini gain or information gain of all nodes in BestFirstEelements
    // is 0).
    if (gain == 0) {
      for (int i = 0; i < BestFirstElements.size(); i++) {
        ArrayList<Object> element = BestFirstElements.get(i);
        BFTree node = (BFTree) element.get(0);
        node.makeLeaf(train);
      }
      BestFirstElements.clear();
    }

    // gini gain or information gain is not 0
    else {
      // remove the first element
      BestFirstElements.remove(0);
      m_Attribute = att;
      if (att.isNumeric()) {
        m_SplitValue = splitValue;
      } else {
        m_SplitString = splitStr;
      }

      int[][][] subsetIndices = new int[2][train.numAttributes()][0];
      double[][][] subsetWeights = new double[2][train.numAttributes()][0];

      splitData(subsetIndices, subsetWeights, m_Attribute, m_SplitValue,
        m_SplitString, sortedIndices, weights, train);

      // if split will generate node(s) which has total weights less than
      // m_minNumObj,
      // do not split
      int attIndex = att.index();
      if (subsetIndices[0][attIndex].length < minNumObj
        || subsetIndices[1][attIndex].length < minNumObj) {
        makeLeaf(train);
      }

      // split the node and cauculate error rate of this temporary tree
      else {
        m_isLeaf = false;
        m_Attribute = att;

        makeSuccessors(BestFirstElements, train, subsetIndices, subsetWeights,
          dists, m_Attribute, useHeuristic, useGini);
        for (int i = 0; i < 2; i++) {
          m_Successors[i].makeLeaf(train);
        }

        Evaluation eval = new Evaluation(test);
        eval.evaluateModel(root, test);
        double error;
        if (useErrorRate) {
          error = eval.errorRate();
        } else {
          error = eval.rootMeanSquaredError();
        }
        modelError.add(new Double(error));
      }

      if (BestFirstElements.size() != 0) {
        ArrayList<Object> nextSplitElement = BestFirstElements.get(0);
        BFTree nextSplitNode = (BFTree) nextSplitElement.get(0);
        nextSplitNode.makeTree(BestFirstElements, root, train, test,
          modelError, nextSplitNode.m_SortedIndices, nextSplitNode.m_Weights,
          nextSplitNode.m_Dists, nextSplitNode.m_ClassProbs,
          nextSplitNode.m_TotalWeight, nextSplitNode.m_Props, minNumObj,
          useHeuristic, useGini, useErrorRate);
      }
    }
  }

  /**
   * Generate successor nodes for a node and put them into BestFirstElements
   * according to gini gain or information gain in a descending order.
   * 
   * @param BestFirstElements list to store BestFirst nodes
   * @param data training instance
   * @param subsetSortedIndices sorted indices of instances of successor nodes
   * @param subsetWeights weights of instances of successor nodes
   * @param dists class distributions of successor nodes
   * @param att attribute used to split the node
   * @param useHeuristic if use heuristic search for nominal attributes in
   *          multi-class problem
   * @param useGini if use Gini index as splitting criterion
   * @throws Exception if something goes wrong
   */
  protected void makeSuccessors(ArrayList<ArrayList<Object>> BestFirstElements,
    Instances data, int[][][] subsetSortedIndices, double[][][] subsetWeights,
    double[][][] dists, Attribute att, boolean useHeuristic, boolean useGini)
    throws Exception {

    m_Successors = new BFTree[2];

    for (int i = 0; i < 2; i++) {
      m_Successors[i] = new BFTree();
      m_Successors[i].m_isLeaf = true;

      // class probability and distribution for this successor node
      m_Successors[i].m_ClassProbs = new double[data.numClasses()];
      m_Successors[i].m_Distribution = new double[data.numClasses()];
      System.arraycopy(dists[att.index()][i], 0, m_Successors[i].m_ClassProbs,
        0, m_Successors[i].m_ClassProbs.length);
      System.arraycopy(dists[att.index()][i], 0,
        m_Successors[i].m_Distribution, 0,
        m_Successors[i].m_Distribution.length);
      if (Utils.sum(m_Successors[i].m_ClassProbs) != 0) {
        Utils.normalize(m_Successors[i].m_ClassProbs);
      }

      // split information for this successor node
      double[][] props = new double[data.numAttributes()][2];
      double[][][] subDists = new double[data.numAttributes()][2][data
        .numClasses()];
      double[][] totalSubsetWeights = new double[data.numAttributes()][2];
      ArrayList<Object> splitInfo = m_Successors[i].computeSplitInfo(
        m_Successors[i], data, subsetSortedIndices[i], subsetWeights[i],
        subDists, props, totalSubsetWeights, useHeuristic, useGini);

      // branch proportion for this successor node
      int splitIndex = ((Attribute) splitInfo.get(1)).index();
      m_Successors[i].m_Props = new double[2];
      System.arraycopy(props[splitIndex], 0, m_Successors[i].m_Props, 0,
        m_Successors[i].m_Props.length);

      // sorted indices and weights of each attribute for this successor node
      m_Successors[i].m_SortedIndices = new int[data.numAttributes()][0];
      m_Successors[i].m_Weights = new double[data.numAttributes()][0];
      for (int j = 0; j < m_Successors[i].m_SortedIndices.length; j++) {
        m_Successors[i].m_SortedIndices[j] = subsetSortedIndices[i][j];
        m_Successors[i].m_Weights[j] = subsetWeights[i][j];
      }

      // distribution of each attribute for this successor node
      m_Successors[i].m_Dists = new double[data.numAttributes()][2][data
        .numClasses()];
      for (int j = 0; j < subDists.length; j++) {
        m_Successors[i].m_Dists[j] = subDists[j];
      }

      // total weights for this successor node.
      m_Successors[i].m_TotalWeight = Utils.sum(totalSubsetWeights[splitIndex]);

      // insert this successor node into BestFirstElements according to gini
      // gain or information gain
      // descendingly
      if (BestFirstElements.size() == 0) {
        BestFirstElements.add(splitInfo);
      } else {
        double gGain = ((Double) (splitInfo.get(3))).doubleValue();
        int vectorSize = BestFirstElements.size();
        ArrayList<Object> lastNode = BestFirstElements.get(vectorSize - 1);

        // If gini gain is less than that of last node in FastVector
        if (gGain < ((Double) (lastNode.get(3))).doubleValue()) {
          BestFirstElements.add(vectorSize, splitInfo);
        } else {
          for (int j = 0; j < vectorSize; j++) {
            ArrayList<Object> node = BestFirstElements.get(j);
            double nodeGain = ((Double) (node.get(3))).doubleValue();
            if (gGain >= nodeGain) {
              BestFirstElements.add(j, splitInfo);
              break;
            }
          }
        }
      }
    }
  }

  /**
   * Compute sorted indices, weights and class probabilities for a given
   * dataset. Return total weights of the data at the node.
   * 
   * @param data training data
   * @param sortedIndices sorted indices of instances at the node
   * @param weights weights of instances at the node
   * @param classProbs class probabilities at the node
   * @return total weights of instances at the node
   * @throws Exception if something goes wrong
   */
  protected double computeSortedInfo(Instances data, int[][] sortedIndices,
    double[][] weights, double[] classProbs) throws Exception {

    // Create array of sorted indices and weights
    double[] vals = new double[data.numInstances()];
    for (int j = 0; j < data.numAttributes(); j++) {
      if (j == data.classIndex()) {
        continue;
      }
      weights[j] = new double[data.numInstances()];

      if (data.attribute(j).isNominal()) {

        // Handling nominal attributes. Putting indices of
        // instances with missing values at the end.
        sortedIndices[j] = new int[data.numInstances()];
        int count = 0;
        for (int i = 0; i < data.numInstances(); i++) {
          Instance inst = data.instance(i);
          if (!inst.isMissing(j)) {
            sortedIndices[j][count] = i;
            weights[j][count] = inst.weight();
            count++;
          }
        }
        for (int i = 0; i < data.numInstances(); i++) {
          Instance inst = data.instance(i);
          if (inst.isMissing(j)) {
            sortedIndices[j][count] = i;
            weights[j][count] = inst.weight();
            count++;
          }
        }
      } else {

        // Sorted indices are computed for numeric attributes
        // missing values instances are put to end (through Utils.sort() method)
        for (int i = 0; i < data.numInstances(); i++) {
          Instance inst = data.instance(i);
          vals[i] = inst.value(j);
        }
        sortedIndices[j] = Utils.sort(vals);
        for (int i = 0; i < data.numInstances(); i++) {
          weights[j][i] = data.instance(sortedIndices[j][i]).weight();
        }
      }
    }

    // Compute initial class counts and total weight
    double totalWeight = 0;
    for (int i = 0; i < data.numInstances(); i++) {
      Instance inst = data.instance(i);
      classProbs[(int) inst.classValue()] += inst.weight();
      totalWeight += inst.weight();
    }

    return totalWeight;
  }

  /**
   * Compute the best splitting attribute, split point or subset and the best
   * gini gain or iformation gain for a given dataset.
   * 
   * @param node node to be split
   * @param data training data
   * @param sortedIndices sorted indices of the instances
   * @param weights weights of the instances
   * @param dists class distributions for each attribute
   * @param props proportions of two branches
   * @param totalSubsetWeights total weight of two subsets
   * @param useHeuristic if use heuristic search for nominal attributes in
   *          multi-class problem
   * @param useGini if use Gini index as splitting criterion
   * @return split information about the node
   * @throws Exception if something is wrong
   */
  protected ArrayList<Object> computeSplitInfo(BFTree node, Instances data,
    int[][] sortedIndices, double[][] weights, double[][][] dists,
    double[][] props, double[][] totalSubsetWeights, boolean useHeuristic,
    boolean useGini) throws Exception {

    double[] splits = new double[data.numAttributes()];
    String[] splitString = new String[data.numAttributes()];
    double[] gains = new double[data.numAttributes()];

    for (int i = 0; i < data.numAttributes(); i++) {
      if (i == data.classIndex()) {
        continue;
      }
      Attribute att = data.attribute(i);
      if (att.isNumeric()) {
        // numeric attribute
        splits[i] = numericDistribution(props, dists, att, sortedIndices[i],
          weights[i], totalSubsetWeights, gains, data, useGini);
      } else {
        // nominal attribute
        splitString[i] = nominalDistribution(props, dists, att,
          sortedIndices[i], weights[i], totalSubsetWeights, gains, data,
          useHeuristic, useGini);
      }
    }

    int index = Utils.maxIndex(gains);
    double mBestGain = gains[index];

    Attribute att = data.attribute(index);
    double mValue = Double.NaN;
    String mString = null;
    if (att.isNumeric()) {
      mValue = splits[index];
    } else {
      mString = splitString[index];
      if (mString == null) {
        mString = "";
      }
    }

    // split information
    ArrayList<Object> splitInfo = new ArrayList<Object>();
    splitInfo.add(node);
    splitInfo.add(att);
    if (att.isNumeric()) {
      splitInfo.add(new Double(mValue));
    } else {
      splitInfo.add(mString);
    }
    splitInfo.add(new Double(mBestGain));

    return splitInfo;
  }

  /**
   * Compute distributions, proportions and total weights of two successor nodes
   * for a given numeric attribute.
   * 
   * @param props proportions of each two branches for each attribute
   * @param dists class distributions of two branches for each attribute
   * @param att numeric att split on
   * @param sortedIndices sorted indices of instances for the attirubte
   * @param weights weights of instances for the attirbute
   * @param subsetWeights total weight of two branches split based on the
   *          attribute
   * @param gains Gini gains or information gains for each attribute
   * @param data training instances
   * @param useGini if use Gini index as splitting criterion
   * @return Gini gain or information gain for the given attribute
   * @throws Exception if something goes wrong
   */
  protected double numericDistribution(double[][] props, double[][][] dists,
    Attribute att, int[] sortedIndices, double[] weights,
    double[][] subsetWeights, double[] gains, Instances data, boolean useGini)
    throws Exception {

    double splitPoint = Double.NaN;
    double[][] dist = null;
    int numClasses = data.numClasses();
    int i; // differ instances with or without missing values

    double[][] currDist = new double[2][numClasses];
    dist = new double[2][numClasses];

    // Move all instances without missing values into second subset
    double[] parentDist = new double[numClasses];
    int missingStart = 0;
    for (int j = 0; j < sortedIndices.length; j++) {
      Instance inst = data.instance(sortedIndices[j]);
      if (!inst.isMissing(att)) {
        missingStart++;
        currDist[1][(int) inst.classValue()] += weights[j];
      }
      parentDist[(int) inst.classValue()] += weights[j];
    }
    System.arraycopy(currDist[1], 0, dist[1], 0, dist[1].length);

    // Try all possible split points
    double currSplit = data.instance(sortedIndices[0]).value(att);
    double currGain;
    double bestGain = -Double.MAX_VALUE;

    for (i = 0; i < sortedIndices.length; i++) {
      Instance inst = data.instance(sortedIndices[i]);
      if (inst.isMissing(att)) {
        break;
      }
      if (inst.value(att) > currSplit) {

        double[][] tempDist = new double[2][numClasses];
        for (int k = 0; k < 2; k++) {
          // tempDist[k] = currDist[k];
          System.arraycopy(currDist[k], 0, tempDist[k], 0, tempDist[k].length);
        }

        double[] tempProps = new double[2];
        for (int k = 0; k < 2; k++) {
          tempProps[k] = Utils.sum(tempDist[k]);
        }

        if (Utils.sum(tempProps) != 0) {
          Utils.normalize(tempProps);
        }

        // split missing values
        int index = missingStart;
        while (index < sortedIndices.length) {
          Instance insta = data.instance(sortedIndices[index]);
          for (int j = 0; j < 2; j++) {
            tempDist[j][(int) insta.classValue()] += tempProps[j]
              * weights[index];
          }
          index++;
        }

        if (useGini) {
          currGain = computeGiniGain(parentDist, tempDist);
        } else {
          currGain = computeInfoGain(parentDist, tempDist);
        }

        if (currGain > bestGain) {
          bestGain = currGain;
          // clean split point
          splitPoint = Math.rint((inst.value(att) + currSplit) / 2.0 * 100000) / 100000.0;
          for (int j = 0; j < currDist.length; j++) {
            System.arraycopy(tempDist[j], 0, dist[j], 0, dist[j].length);
          }
        }
      }
      currSplit = inst.value(att);
      currDist[0][(int) inst.classValue()] += weights[i];
      currDist[1][(int) inst.classValue()] -= weights[i];
    }

    // Compute weights
    int attIndex = att.index();
    props[attIndex] = new double[2];
    for (int k = 0; k < 2; k++) {
      props[attIndex][k] = Utils.sum(dist[k]);
    }
    if (Utils.sum(props[attIndex]) != 0) {
      Utils.normalize(props[attIndex]);
    }

    // Compute subset weights
    subsetWeights[attIndex] = new double[2];
    for (int j = 0; j < 2; j++) {
      subsetWeights[attIndex][j] += Utils.sum(dist[j]);
    }

    // clean gain
    gains[attIndex] = Math.rint(bestGain * 10000000) / 10000000.0;
    dists[attIndex] = dist;
    return splitPoint;
  }

  /**
   * Compute distributions, proportions and total weights of two successor nodes
   * for a given nominal attribute.
   * 
   * @param props proportions of each two branches for each attribute
   * @param dists class distributions of two branches for each attribute
   * @param att numeric att split on
   * @param sortedIndices sorted indices of instances for the attirubte
   * @param weights weights of instances for the attirbute
   * @param subsetWeights total weight of two branches split based on the
   *          attribute
   * @param gains Gini gains for each attribute
   * @param data training instances
   * @param useHeuristic if use heuristic search
   * @param useGini if use Gini index as splitting criterion
   * @return Gini gain for the given attribute
   * @throws Exception if something goes wrong
   */
  protected String nominalDistribution(double[][] props, double[][][] dists,
    Attribute att, int[] sortedIndices, double[] weights,
    double[][] subsetWeights, double[] gains, Instances data,
    boolean useHeuristic, boolean useGini) throws Exception {

    String[] values = new String[att.numValues()];
    int numCat = values.length; // number of values of the attribute
    int numClasses = data.numClasses();

    String bestSplitString = "";
    double bestGain = -Double.MAX_VALUE;

    // class frequency for each value
    int[] classFreq = new int[numCat];
    for (int j = 0; j < numCat; j++) {
      classFreq[j] = 0;
    }

    double[] parentDist = new double[numClasses];
    double[][] currDist = new double[2][numClasses];
    double[][] dist = new double[2][numClasses];
    int missingStart = 0;

    for (int i = 0; i < sortedIndices.length; i++) {
      Instance inst = data.instance(sortedIndices[i]);
      if (!inst.isMissing(att)) {
        missingStart++;
        classFreq[(int) inst.value(att)]++;
      }
      parentDist[(int) inst.classValue()] += weights[i];
    }

    // count the number of values that class frequency is not 0
    int nonEmpty = 0;
    for (int j = 0; j < numCat; j++) {
      if (classFreq[j] != 0) {
        nonEmpty++;
      }
    }

    // attribute values which class frequency is not 0
    String[] nonEmptyValues = new String[nonEmpty];
    int nonEmptyIndex = 0;
    for (int j = 0; j < numCat; j++) {
      if (classFreq[j] != 0) {
        nonEmptyValues[nonEmptyIndex] = att.value(j);
        nonEmptyIndex++;
      }
    }

    // attribute values which class frequency is 0
    int empty = numCat - nonEmpty;
    String[] emptyValues = new String[empty];
    int emptyIndex = 0;
    for (int j = 0; j < numCat; j++) {
      if (classFreq[j] == 0) {
        emptyValues[emptyIndex] = att.value(j);
        emptyIndex++;
      }
    }

    if (nonEmpty <= 1) {
      gains[att.index()] = 0;
      return "";
    }

    // for tow-class probloms
    if (data.numClasses() == 2) {

      // // Firstly, for attribute values which class frequency is not zero

      // probability of class 0 for each attribute value
      double[] pClass0 = new double[nonEmpty];
      // class distribution for each attribute value
      double[][] valDist = new double[nonEmpty][2];

      for (int j = 0; j < nonEmpty; j++) {
        for (int k = 0; k < 2; k++) {
          valDist[j][k] = 0;
        }
      }

      for (int sortedIndice : sortedIndices) {
        Instance inst = data.instance(sortedIndice);
        if (inst.isMissing(att)) {
          break;
        }

        for (int j = 0; j < nonEmpty; j++) {
          if (att.value((int) inst.value(att)).compareTo(nonEmptyValues[j]) == 0) {
            valDist[j][(int) inst.classValue()] += inst.weight();
            break;
          }
        }
      }

      for (int j = 0; j < nonEmpty; j++) {
        double distSum = Utils.sum(valDist[j]);
        if (distSum == 0) {
          pClass0[j] = 0;
        } else {
          pClass0[j] = valDist[j][0] / distSum;
        }
      }

      // sort category according to the probability of class 0.0
      String[] sortedValues = new String[nonEmpty];
      for (int j = 0; j < nonEmpty; j++) {
        sortedValues[j] = nonEmptyValues[Utils.minIndex(pClass0)];
        pClass0[Utils.minIndex(pClass0)] = Double.MAX_VALUE;
      }

      // Find a subset of attribute values that maximize impurity decrease

      // for the attribute values that class frequency is not 0
      String tempStr = "";

      for (int j = 0; j < nonEmpty - 1; j++) {
        currDist = new double[2][numClasses];
        if (tempStr == "") {
          tempStr = "(" + sortedValues[j] + ")";
        } else {
          tempStr += "|" + "(" + sortedValues[j] + ")";
        }
        // System.out.println(sortedValues[j]);
        for (int i = 0; i < sortedIndices.length; i++) {
          Instance inst = data.instance(sortedIndices[i]);
          if (inst.isMissing(att)) {
            break;
          }

          if (tempStr.indexOf("(" + att.value((int) inst.value(att)) + ")") != -1) {
            currDist[0][(int) inst.classValue()] += weights[i];
          } else {
            currDist[1][(int) inst.classValue()] += weights[i];
          }
        }

        double[][] tempDist = new double[2][numClasses];
        for (int kk = 0; kk < 2; kk++) {
          tempDist[kk] = currDist[kk];
        }

        double[] tempProps = new double[2];
        for (int kk = 0; kk < 2; kk++) {
          tempProps[kk] = Utils.sum(tempDist[kk]);
        }

        if (Utils.sum(tempProps) != 0) {
          Utils.normalize(tempProps);
        }

        // split missing values
        int mstart = missingStart;
        while (mstart < sortedIndices.length) {
          Instance insta = data.instance(sortedIndices[mstart]);
          for (int jj = 0; jj < 2; jj++) {
            tempDist[jj][(int) insta.classValue()] += tempProps[jj]
              * weights[mstart];
          }
          mstart++;
        }

        double currGain;
        if (useGini) {
          currGain = computeGiniGain(parentDist, tempDist);
        } else {
          currGain = computeInfoGain(parentDist, tempDist);
        }

        if (currGain > bestGain) {
          bestGain = currGain;
          bestSplitString = tempStr;
          for (int jj = 0; jj < 2; jj++) {
            System.arraycopy(tempDist[jj], 0, dist[jj], 0, dist[jj].length);
          }
        }
      }
    }

    // multi-class problems (exhaustive search)
    else if (!useHeuristic || nonEmpty <= 4) {
      // else if (!useHeuristic || nonEmpty==2) {

      // Firstly, for attribute values which class frequency is not zero
      for (int i = 0; i < (int) Math.pow(2, nonEmpty - 1); i++) {
        String tempStr = "";
        currDist = new double[2][numClasses];
        int mod;
        int bit10 = i;
        for (int j = nonEmpty - 1; j >= 0; j--) {
          mod = bit10 % 2; // convert from 10bit to 2bit
          if (mod == 1) {
            if (tempStr == "") {
              tempStr = "(" + nonEmptyValues[j] + ")";
            } else {
              tempStr += "|" + "(" + nonEmptyValues[j] + ")";
            }
          }
          bit10 = bit10 / 2;
        }
        for (int j = 0; j < sortedIndices.length; j++) {
          Instance inst = data.instance(sortedIndices[j]);
          if (inst.isMissing(att)) {
            break;
          }

          if (tempStr.indexOf("(" + att.value((int) inst.value(att)) + ")") != -1) {
            currDist[0][(int) inst.classValue()] += weights[j];
          } else {
            currDist[1][(int) inst.classValue()] += weights[j];
          }
        }

        double[][] tempDist = new double[2][numClasses];
        for (int k = 0; k < 2; k++) {
          tempDist[k] = currDist[k];
        }

        double[] tempProps = new double[2];
        for (int k = 0; k < 2; k++) {
          tempProps[k] = Utils.sum(tempDist[k]);
        }

        if (Utils.sum(tempProps) != 0) {
          Utils.normalize(tempProps);
        }

        // split missing values
        int index = missingStart;
        while (index < sortedIndices.length) {
          Instance insta = data.instance(sortedIndices[index]);
          for (int j = 0; j < 2; j++) {
            tempDist[j][(int) insta.classValue()] += tempProps[j]
              * weights[index];
          }
          index++;
        }

        double currGain;
        if (useGini) {
          currGain = computeGiniGain(parentDist, tempDist);
        } else {
          currGain = computeInfoGain(parentDist, tempDist);
        }

        if (currGain > bestGain) {
          bestGain = currGain;
          bestSplitString = tempStr;
          for (int j = 0; j < 2; j++) {
            // dist[jj] = new double[currDist[jj].length];
            System.arraycopy(tempDist[j], 0, dist[j], 0, dist[j].length);
          }
        }
      }
    }

    // huristic method to solve multi-classes problems
    else {
      // Firstly, for attribute values which class frequency is not zero
      int n = nonEmpty;
      int k = data.numClasses(); // number of classes of the data
      double[][] P = new double[n][k]; // class probability matrix
      int[] numInstancesValue = new int[n]; // number of instances for an
                                            // attribute value
      double[] meanClass = new double[k]; // vector of mean class probability
      int numInstances = data.numInstances(); // total number of instances

      // initialize the vector of mean class probability
      for (int j = 0; j < meanClass.length; j++) {
        meanClass[j] = 0;
      }

      for (int j = 0; j < numInstances; j++) {
        Instance inst = data.instance(j);
        int valueIndex = 0; // attribute value index in nonEmptyValues
        for (int i = 0; i < nonEmpty; i++) {
          if (att.value((int) inst.value(att)).compareToIgnoreCase(
            nonEmptyValues[i]) == 0) {
            valueIndex = i;
            break;
          }
        }
        P[valueIndex][(int) inst.classValue()]++;
        numInstancesValue[valueIndex]++;
        meanClass[(int) inst.classValue()]++;
      }

      // calculate the class probability matrix
      for (int i = 0; i < P.length; i++) {
        for (int j = 0; j < P[0].length; j++) {
          if (numInstancesValue[i] == 0) {
            P[i][j] = 0;
          } else {
            P[i][j] /= numInstancesValue[i];
          }
        }
      }

      // calculate the vector of mean class probability
      for (int i = 0; i < meanClass.length; i++) {
        meanClass[i] /= numInstances;
      }

      // calculate the covariance matrix
      double[][] covariance = new double[k][k];
      for (int i1 = 0; i1 < k; i1++) {
        for (int i2 = 0; i2 < k; i2++) {
          double element = 0;
          for (int j = 0; j < n; j++) {
            element += (P[j][i2] - meanClass[i2]) * (P[j][i1] - meanClass[i1])
              * numInstancesValue[j];
          }
          covariance[i1][i2] = element;
        }
      }

      Matrix matrix = new Matrix(covariance);
      weka.core.matrix.EigenvalueDecomposition eigen = new weka.core.matrix.EigenvalueDecomposition(
        matrix);
      double[] eigenValues = eigen.getRealEigenvalues();

      // find index of the largest eigenvalue
      int index = 0;
      double largest = eigenValues[0];
      for (int i = 1; i < eigenValues.length; i++) {
        if (eigenValues[i] > largest) {
          index = i;
          largest = eigenValues[i];
        }
      }

      // calculate the first principle component
      double[] FPC = new double[k];
      Matrix eigenVector = eigen.getV();
      double[][] vectorArray = eigenVector.getArray();
      for (int i = 0; i < FPC.length; i++) {
        FPC[i] = vectorArray[i][index];
      }

      // calculate the first principle component scores
      double[] Sa = new double[n];
      for (int i = 0; i < Sa.length; i++) {
        Sa[i] = 0;
        for (int j = 0; j < k; j++) {
          Sa[i] += FPC[j] * P[i][j];
        }
      }

      // sort category according to Sa(s)
      double[] pCopy = new double[n];
      System.arraycopy(Sa, 0, pCopy, 0, n);
      String[] sortedValues = new String[n];
      Arrays.sort(Sa);

      for (int j = 0; j < n; j++) {
        sortedValues[j] = nonEmptyValues[Utils.minIndex(pCopy)];
        pCopy[Utils.minIndex(pCopy)] = Double.MAX_VALUE;
      }

      // for the attribute values that class frequency is not 0
      String tempStr = "";

      for (int j = 0; j < nonEmpty - 1; j++) {
        currDist = new double[2][numClasses];
        if (tempStr == "") {
          tempStr = "(" + sortedValues[j] + ")";
        } else {
          tempStr += "|" + "(" + sortedValues[j] + ")";
        }
        for (int i = 0; i < sortedIndices.length; i++) {
          Instance inst = data.instance(sortedIndices[i]);
          if (inst.isMissing(att)) {
            break;
          }

          if (tempStr.indexOf("(" + att.value((int) inst.value(att)) + ")") != -1) {
            currDist[0][(int) inst.classValue()] += weights[i];
          } else {
            currDist[1][(int) inst.classValue()] += weights[i];
          }
        }

        double[][] tempDist = new double[2][numClasses];
        for (int kk = 0; kk < 2; kk++) {
          tempDist[kk] = currDist[kk];
        }

        double[] tempProps = new double[2];
        for (int kk = 0; kk < 2; kk++) {
          tempProps[kk] = Utils.sum(tempDist[kk]);
        }

        if (Utils.sum(tempProps) != 0) {
          Utils.normalize(tempProps);
        }

        // split missing values
        int mstart = missingStart;
        while (mstart < sortedIndices.length) {
          Instance insta = data.instance(sortedIndices[mstart]);
          for (int jj = 0; jj < 2; jj++) {
            tempDist[jj][(int) insta.classValue()] += tempProps[jj]
              * weights[mstart];
          }
          mstart++;
        }

        double currGain;
        if (useGini) {
          currGain = computeGiniGain(parentDist, tempDist);
        } else {
          currGain = computeInfoGain(parentDist, tempDist);
        }

        if (currGain > bestGain) {
          bestGain = currGain;
          bestSplitString = tempStr;
          for (int jj = 0; jj < 2; jj++) {
            // dist[jj] = new double[currDist[jj].length];
            System.arraycopy(tempDist[jj], 0, dist[jj], 0, dist[jj].length);
          }
        }
      }
    }

    // Compute weights
    int attIndex = att.index();
    props[attIndex] = new double[2];
    for (int k = 0; k < 2; k++) {
      props[attIndex][k] = Utils.sum(dist[k]);
    }
    if (!(Utils.sum(props[attIndex]) > 0)) {
      for (int k = 0; k < props[attIndex].length; k++) {
        props[attIndex][k] = 1.0 / props[attIndex].length;
      }
    } else {
      Utils.normalize(props[attIndex]);
    }

    // Compute subset weights
    subsetWeights[attIndex] = new double[2];
    for (int j = 0; j < 2; j++) {
      subsetWeights[attIndex][j] += Utils.sum(dist[j]);
    }

    // Then, for the attribute values that class frequency is 0, split it into
    // the
    // most frequent branch
    for (int j = 0; j < empty; j++) {
      if (props[attIndex][0] >= props[attIndex][1]) {
        if (bestSplitString == "") {
          bestSplitString = "(" + emptyValues[j] + ")";
        } else {
          bestSplitString += "|" + "(" + emptyValues[j] + ")";
        }
      }
    }

    // clean gain
    gains[attIndex] = Math.rint(bestGain * 10000000) / 10000000.0;

    dists[attIndex] = dist;
    return bestSplitString;
  }

  /**
   * Split data into two subsets and store sorted indices and weights for two
   * successor nodes.
   * 
   * @param subsetIndices sorted indecis of instances for each attribute for two
   *          successor node
   * @param subsetWeights weights of instances for each attribute for two
   *          successor node
   * @param att attribute the split based on
   * @param splitPoint split point the split based on if att is numeric
   * @param splitStr split subset the split based on if att is nominal
   * @param sortedIndices sorted indices of the instances to be split
   * @param weights weights of the instances to bes split
   * @param data training data
   * @throws Exception if something goes wrong
   */
  protected void splitData(int[][][] subsetIndices, double[][][] subsetWeights,
    Attribute att, double splitPoint, String splitStr, int[][] sortedIndices,
    double[][] weights, Instances data) throws Exception {

    int j;
    // For each attribute
    for (int i = 0; i < data.numAttributes(); i++) {
      if (i == data.classIndex()) {
        continue;
      }
      int[] num = new int[2];
      for (int k = 0; k < 2; k++) {
        subsetIndices[k][i] = new int[sortedIndices[i].length];
        subsetWeights[k][i] = new double[weights[i].length];
      }

      for (j = 0; j < sortedIndices[i].length; j++) {
        Instance inst = data.instance(sortedIndices[i][j]);
        if (inst.isMissing(att)) {
          // Split instance up
          for (int k = 0; k < 2; k++) {
            if (m_Props[k] > 0) {
              subsetIndices[k][i][num[k]] = sortedIndices[i][j];
              subsetWeights[k][i][num[k]] = m_Props[k] * weights[i][j];
              num[k]++;
            }
          }
        } else {
          int subset;
          if (att.isNumeric()) {
            subset = (inst.value(att) < splitPoint) ? 0 : 1;
          } else { // nominal attribute
            if (splitStr.indexOf("(" + att.value((int) inst.value(att.index()))
              + ")") != -1) {
              subset = 0;
            } else {
              subset = 1;
            }
          }
          subsetIndices[subset][i][num[subset]] = sortedIndices[i][j];
          subsetWeights[subset][i][num[subset]] = weights[i][j];
          num[subset]++;
        }
      }

      // Trim arrays
      for (int k = 0; k < 2; k++) {
        int[] copy = new int[num[k]];
        System.arraycopy(subsetIndices[k][i], 0, copy, 0, num[k]);
        subsetIndices[k][i] = copy;
        double[] copyWeights = new double[num[k]];
        System.arraycopy(subsetWeights[k][i], 0, copyWeights, 0, num[k]);
        subsetWeights[k][i] = copyWeights;
      }
    }
  }

  /**
   * Compute and return gini gain for given distributions of a node and its
   * successor nodes.
   * 
   * @param parentDist class distributions of parent node
   * @param childDist class distributions of successor nodes
   * @return Gini gain computed
   */
  protected double computeGiniGain(double[] parentDist, double[][] childDist) {
    double totalWeight = Utils.sum(parentDist);
    if (totalWeight == 0) {
      return 0;
    }

    double leftWeight = Utils.sum(childDist[0]);
    double rightWeight = Utils.sum(childDist[1]);

    double parentGini = computeGini(parentDist, totalWeight);
    double leftGini = computeGini(childDist[0], leftWeight);
    double rightGini = computeGini(childDist[1], rightWeight);

    return parentGini - leftWeight / totalWeight * leftGini - rightWeight
      / totalWeight * rightGini;
  }

  /**
   * Compute and return gini index for a given distribution of a node.
   * 
   * @param dist class distributions
   * @param total class distributions
   * @return Gini index of the class distributions
   */
  protected double computeGini(double[] dist, double total) {
    if (total == 0) {
      return 0;
    }
    double val = 0;
    for (double element : dist) {
      val += (element / total) * (element / total);
    }
    return 1 - val;
  }

  /**
   * Compute and return information gain for given distributions of a node and
   * its successor nodes.
   * 
   * @param parentDist class distributions of parent node
   * @param childDist class distributions of successor nodes
   * @return information gain computed
   */
  protected double computeInfoGain(double[] parentDist, double[][] childDist) {
    double totalWeight = Utils.sum(parentDist);
    if (totalWeight == 0) {
      return 0;
    }

    double leftWeight = Utils.sum(childDist[0]);
    double rightWeight = Utils.sum(childDist[1]);

    double parentInfo = computeEntropy(parentDist, totalWeight);
    double leftInfo = computeEntropy(childDist[0], leftWeight);
    double rightInfo = computeEntropy(childDist[1], rightWeight);

    return parentInfo - leftWeight / totalWeight * leftInfo - rightWeight
      / totalWeight * rightInfo;
  }

  /**
   * Compute and return entropy for a given distribution of a node.
   * 
   * @param dist class distributions
   * @param total class distributions
   * @return entropy of the class distributions
   */
  protected double computeEntropy(double[] dist, double total) {
    if (total == 0) {
      return 0;
    }
    double entropy = 0;
    for (double element : dist) {
      if (element != 0) {
        entropy -= element / total * Utils.log2(element / total);
      }
    }
    return entropy;
  }

  /**
   * Make the node leaf node.
   * 
   * @param data training data
   */
  protected void makeLeaf(Instances data) {
    m_Attribute = null;
    m_isLeaf = true;
    m_ClassValue = Utils.maxIndex(m_ClassProbs);
    m_ClassAttribute = data.classAttribute();
  }

  /**
   * Computes class probabilities for instance using the decision tree.
   * 
   * @param instance the instance for which class probabilities is to be
   *          computed
   * @return the class probabilities for the given instance
   * @throws Exception if something goes wrong
   */
  @Override
  public double[] distributionForInstance(Instance instance) throws Exception {
    if (!m_isLeaf) {
      // value of split attribute is missing
      if (instance.isMissing(m_Attribute)) {
        double[] returnedDist = new double[m_ClassProbs.length];

        for (int i = 0; i < m_Successors.length; i++) {
          double[] help = m_Successors[i].distributionForInstance(instance);
          if (help != null) {
            for (int j = 0; j < help.length; j++) {
              returnedDist[j] += m_Props[i] * help[j];
            }
          }
        }
        return returnedDist;
      }

      // split attribute is nonimal
      else if (m_Attribute.isNominal()) {
        if (m_SplitString.indexOf("("
          + m_Attribute.value((int) instance.value(m_Attribute)) + ")") != -1) {
          return m_Successors[0].distributionForInstance(instance);
        } else {
          return m_Successors[1].distributionForInstance(instance);
        }
      }

      // split attribute is numeric
      else {
        if (instance.value(m_Attribute) < m_SplitValue) {
          return m_Successors[0].distributionForInstance(instance);
        } else {
          return m_Successors[1].distributionForInstance(instance);
        }
      }
    } else {
      return m_ClassProbs;
    }
  }

  /**
   * Prints the decision tree using the protected toString method from below.
   * 
   * @return a textual description of the classifier
   */
  @Override
  public String toString() {
    if ((m_Distribution == null) && (m_Successors == null)) {
      return "Best-First: No model built yet.";
    }
    return "Best-First Decision Tree\n" + toString(0) + "\n\n"
      + "Size of the Tree: " + numNodes() + "\n\n" + "Number of Leaf Nodes: "
      + numLeaves();
  }

  /**
   * Outputs a tree at a certain level.
   * 
   * @param level the level at which the tree is to be printed
   * @return a tree at a certain level.
   */
  protected String toString(int level) {
    StringBuffer text = new StringBuffer();
    // if leaf nodes
    if (m_Attribute == null) {
      if (Utils.isMissingValue(m_ClassValue)) {
        text.append(": null");
      } else {
        double correctNum = Math.rint(m_Distribution[Utils
          .maxIndex(m_Distribution)] * 100) / 100.0;
        double wrongNum = Math
          .rint((Utils.sum(m_Distribution) - m_Distribution[Utils
            .maxIndex(m_Distribution)]) * 100) / 100.0;
        String str = "(" + correctNum + "/" + wrongNum + ")";
        text.append(": " + m_ClassAttribute.value((int) m_ClassValue) + str);
      }
    } else {
      for (int j = 0; j < 2; j++) {
        text.append("\n");
        for (int i = 0; i < level; i++) {
          text.append("|  ");
        }
        if (j == 0) {
          if (m_Attribute.isNumeric()) {
            text.append(m_Attribute.name() + " < " + m_SplitValue);
          } else {
            text.append(m_Attribute.name() + "=" + m_SplitString);
          }
        } else {
          if (m_Attribute.isNumeric()) {
            text.append(m_Attribute.name() + " >= " + m_SplitValue);
          } else {
            text.append(m_Attribute.name() + "!=" + m_SplitString);
          }
        }
        text.append(m_Successors[j].toString(level + 1));
      }
    }
    return text.toString();
  }

  /**
   * Compute size of the tree.
   * 
   * @return size of the tree
   */
  public int numNodes() {
    if (m_isLeaf) {
      return 1;
    } else {
      int size = 1;
      for (BFTree m_Successor : m_Successors) {
        size += m_Successor.numNodes();
      }
      return size;
    }
  }

  /**
   * Compute number of leaf nodes.
   * 
   * @return number of leaf nodes
   */
  public int numLeaves() {
    if (m_isLeaf) {
      return 1;
    } else {
      int size = 0;
      for (BFTree m_Successor : m_Successors) {
        size += m_Successor.numLeaves();
      }
      return size;
    }
  }

  /**
   * Returns an enumeration describing the available options.
   * 
   * @return an enumeration describing the available options.
   */
  @Override
  public Enumeration<Option> listOptions() {

    Vector<Option> result = new Vector<Option>();

    result.addElement(new Option("\tThe pruning strategy.\n" + "\t(default: "
      + new SelectedTag(PRUNING_POSTPRUNING, TAGS_PRUNING) + ")", "P", 1, "-P "
      + Tag.toOptionList(TAGS_PRUNING)));

    result.addElement(new Option(
      "\tThe minimal number of instances at the terminal nodes.\n"
        + "\t(default 2)", "M", 1, "-M <min no>"));

    result.addElement(new Option("\tThe number of folds used in the pruning.\n"
      + "\t(default 5)", "N", 5, "-N <num folds>"));

    result.addElement(new Option(
      "\tDon't use heuristic search for nominal attributes in multi-class\n"
        + "\tproblem (default yes).\n", "H", 0, "-H"));

    result.addElement(new Option(
      "\tDon't use Gini index for splitting (default yes),\n"
        + "\tif not information is used.", "G", 0, "-G"));

    result.addElement(new Option(
      "\tDon't use error rate in internal cross-validation (default yes), \n"
        + "\tbut root mean squared error.", "R", 0, "-R"));

    result.addElement(new Option(
      "\tUse the 1 SE rule to make pruning decision.\n" + "\t(default no).",
      "A", 0, "-A"));

    result.addElement(new Option("\tPercentage of training data size (0-1]\n"
      + "\t(default 1).", "C", 0, "-C"));

    result.addAll(Collections.list(super.listOptions()));

    return result.elements();
  }

  /**
   * Parses the options for this object.
   * <p/>
   * 
   * <!-- options-start --> Valid options are:
   * <p/>
   * 
   * <pre>
   * -S &lt;num&gt;
   *  Random number seed.
   *  (default 1)
   * </pre>
   * 
   * <pre>
   * -D
   *  If set, classifier is run in debug mode and
   *  may output additional info to the console
   * </pre>
   * 
   * <pre>
   * -P &lt;UNPRUNED|POSTPRUNED|PREPRUNED&gt;
   *  The pruning strategy.
   *  (default: POSTPRUNED)
   * </pre>
   * 
   * <pre>
   * -M &lt;min no&gt;
   *  The minimal number of instances at the terminal nodes.
   *  (default 2)
   * </pre>
   * 
   * <pre>
   * -N &lt;num folds&gt;
   *  The number of folds used in the pruning.
   *  (default 5)
   * </pre>
   * 
   * <pre>
   * -H
   *  Don't use heuristic search for nominal attributes in multi-class
   *  problem (default yes).
   * </pre>
   * 
   * <pre>
   * -G
   *  Don't use Gini index for splitting (default yes),
   *  if not information is used.
   * </pre>
   * 
   * <pre>
   * -R
   *  Don't use error rate in internal cross-validation (default yes), 
   *  but root mean squared error.
   * </pre>
   * 
   * <pre>
   * -A
   *  Use the 1 SE rule to make pruning decision.
   *  (default no).
   * </pre>
   * 
   * <pre>
   * -C
   *  Percentage of training data size (0-1]
   *  (default 1).
   * </pre>
   * 
   * <!-- options-end -->
   * 
   * @param options the options to use
   * @throws Exception if setting of options fails
   */
  @Override
  public void setOptions(String[] options) throws Exception {
    String tmpStr;

    tmpStr = Utils.getOption('M', options);
    if (tmpStr.length() != 0) {
      setMinNumObj(Integer.parseInt(tmpStr));
    } else {
      setMinNumObj(2);
    }

    tmpStr = Utils.getOption('N', options);
    if (tmpStr.length() != 0) {
      setNumFoldsPruning(Integer.parseInt(tmpStr));
    } else {
      setNumFoldsPruning(5);
    }

    tmpStr = Utils.getOption('C', options);
    if (tmpStr.length() != 0) {
      setSizePer(Double.parseDouble(tmpStr));
    } else {
      setSizePer(1);
    }

    tmpStr = Utils.getOption('P', options);
    if (tmpStr.length() != 0) {
      setPruningStrategy(new SelectedTag(tmpStr, TAGS_PRUNING));
    } else {
      setPruningStrategy(new SelectedTag(PRUNING_POSTPRUNING, TAGS_PRUNING));
    }

    setHeuristic(!Utils.getFlag('H', options));

    setUseGini(!Utils.getFlag('G', options));

    setUseErrorRate(!Utils.getFlag('R', options));

    setUseOneSE(Utils.getFlag('A', options));

    super.setOptions(options);

    Utils.checkForRemainingOptions(options);
  }

  /**
   * Gets the current settings of the Classifier.
   * 
   * @return the current settings of the Classifier
   */
  @Override
  public String[] getOptions() {

    Vector<String> result = new Vector<String>();

    result.add("-M");
    result.add("" + getMinNumObj());

    result.add("-N");
    result.add("" + getNumFoldsPruning());

    if (!getHeuristic()) {
      result.add("-H");
    }

    if (!getUseGini()) {
      result.add("-G");
    }

    if (!getUseErrorRate()) {
      result.add("-R");
    }

    if (getUseOneSE()) {
      result.add("-A");
    }

    result.add("-C");
    result.add("" + getSizePer());

    result.add("-P");
    result.add("" + getPruningStrategy());

    Collections.addAll(result, super.getOptions());

    return result.toArray(new String[result.size()]);
  }

  /**
   * Return an enumeration of the measure names.
   * 
   * @return an enumeration of the measure names
   */
  @Override
  public Enumeration<String> enumerateMeasures() {
    Vector<String> result = new Vector<String>();

    result.addElement("measureTreeSize");

    return result.elements();
  }

  /**
   * Return number of tree size.
   * 
   * @return number of tree size
   */
  public double measureTreeSize() {
    return numNodes();
  }

  /**
   * Returns the value of the named measure
   * 
   * @param additionalMeasureName the name of the measure to query for its value
   * @return the value of the named measure
   * @throws IllegalArgumentException if the named measure is not supported
   */
  @Override
  public double getMeasure(String additionalMeasureName) {
    if (additionalMeasureName.compareToIgnoreCase("measureTreeSize") == 0) {
      return measureTreeSize();
    } else {
      throw new IllegalArgumentException(additionalMeasureName
        + " not supported (Best-First)");
    }
  }

  /**
   * Returns the tip text for this property
   * 
   * @return tip text for this property suitable for displaying in the
   *         explorer/experimenter gui
   */
  public String pruningStrategyTipText() {
    return "Sets the pruning strategy.";
  }

  /**
   * Sets the pruning strategy.
   * 
   * @param value the strategy
   */
  public void setPruningStrategy(SelectedTag value) {
    if (value.getTags() == TAGS_PRUNING) {
      m_PruningStrategy = value.getSelectedTag().getID();
    }
  }

  /**
   * Gets the pruning strategy.
   * 
   * @return the current strategy.
   */
  public SelectedTag getPruningStrategy() {
    return new SelectedTag(m_PruningStrategy, TAGS_PRUNING);
  }

  /**
   * Returns the tip text for this property
   * 
   * @return tip text for this property suitable for displaying in the
   *         explorer/experimenter gui
   */
  public String minNumObjTipText() {
    return "Set minimal number of instances at the terminal nodes.";
  }

  /**
   * Set minimal number of instances at the terminal nodes.
   * 
   * @param value minimal number of instances at the terminal nodes
   */
  public void setMinNumObj(int value) {
    m_minNumObj = value;
  }

  /**
   * Get minimal number of instances at the terminal nodes.
   * 
   * @return minimal number of instances at the terminal nodes
   */
  public int getMinNumObj() {
    return m_minNumObj;
  }

  /**
   * Returns the tip text for this property
   * 
   * @return tip text for this property suitable for displaying in the
   *         explorer/experimenter gui
   */
  public String numFoldsPruningTipText() {
    return "Number of folds in internal cross-validation.";
  }

  /**
   * Set number of folds in internal cross-validation.
   * 
   * @param value the number of folds
   */
  public void setNumFoldsPruning(int value) {
    m_numFoldsPruning = value;
  }

  /**
   * Set number of folds in internal cross-validation.
   * 
   * @return number of folds in internal cross-validation
   */
  public int getNumFoldsPruning() {
    return m_numFoldsPruning;
  }

  /**
   * Returns the tip text for this property
   * 
   * @return tip text for this property suitable for displaying in the
   *         explorer/experimenter gui.
   */
  public String heuristicTipText() {
    return "If heuristic search is used for binary split for nominal attributes.";
  }

  /**
   * Set if use heuristic search for nominal attributes in multi-class problems.
   * 
   * @param value if use heuristic search for nominal attributes in multi-class
   *          problems
   */
  public void setHeuristic(boolean value) {
    m_Heuristic = value;
  }

  /**
   * Get if use heuristic search for nominal attributes in multi-class problems.
   * 
   * @return if use heuristic search for nominal attributes in multi-class
   *         problems
   */
  public boolean getHeuristic() {
    return m_Heuristic;
  }

  /**
   * Returns the tip text for this property
   * 
   * @return tip text for this property suitable for displaying in the
   *         explorer/experimenter gui.
   */
  public String useGiniTipText() {
    return "If true the Gini index is used for splitting criterion, otherwise the information is used.";
  }

  /**
   * Set if use Gini index as splitting criterion.
   * 
   * @param value if use Gini index splitting criterion
   */
  public void setUseGini(boolean value) {
    m_UseGini = value;
  }

  /**
   * Get if use Gini index as splitting criterion.
   * 
   * @return if use Gini index as splitting criterion
   */
  public boolean getUseGini() {
    return m_UseGini;
  }

  /**
   * Returns the tip text for this property
   * 
   * @return tip text for this property suitable for displaying in the
   *         explorer/experimenter gui.
   */
  public String useErrorRateTipText() {
    return "If error rate is used as error estimate. if not, root mean squared error is used.";
  }

  /**
   * Set if use error rate in internal cross-validation.
   * 
   * @param value if use error rate in internal cross-validation
   */
  public void setUseErrorRate(boolean value) {
    m_UseErrorRate = value;
  }

  /**
   * Get if use error rate in internal cross-validation.
   * 
   * @return if use error rate in internal cross-validation.
   */
  public boolean getUseErrorRate() {
    return m_UseErrorRate;
  }

  /**
   * Returns the tip text for this property
   * 
   * @return tip text for this property suitable for displaying in the
   *         explorer/experimenter gui.
   */
  public String useOneSETipText() {
    return "Use the 1SE rule to make pruning decision.";
  }

  /**
   * Set if use the 1SE rule to choose final model.
   * 
   * @param value if use the 1SE rule to choose final model
   */
  public void setUseOneSE(boolean value) {
    m_UseOneSE = value;
  }

  /**
   * Get if use the 1SE rule to choose final model.
   * 
   * @return if use the 1SE rule to choose final model
   */
  public boolean getUseOneSE() {
    return m_UseOneSE;
  }

  /**
   * Returns the tip text for this property
   * 
   * @return tip text for this property suitable for displaying in the
   *         explorer/experimenter gui.
   */
  public String sizePerTipText() {
    return "The percentage of the training set size (0-1, 0 not included).";
  }

  /**
   * Set training set size.
   * 
   * @param value training set size
   */
  public void setSizePer(double value) {
    if ((value <= 0) || (value > 1)) {
      System.err
        .println("The percentage of the training set size must be in range 0 to 1 "
          + "(0 not included) - ignored!");
    } else {
      m_SizePer = value;
    }
  }

  /**
   * Get training set size.
   * 
   * @return training set size
   */
  public double getSizePer() {
    return m_SizePer;
  }

  /**
   * Returns the revision string.
   * 
   * @return the revision
   */
  @Override
  public String getRevision() {
    return RevisionUtils.extract("$Revision$");
  }

  /**
   * Main method.
   * 
   * @param args the options for the classifier
   */
  public static void main(String[] args) {
    runClassifier(new BFTree(), args);
  }
}
